/*	$NetBSD: n_atan2.S,v 1.9 2014/10/10 20:58:09 martin Exp $	*/
/*
 * Copyright (c) 1985, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)atan2.s	8.1 (Berkeley) 6/4/93
 */

#include <machine/asm.h>

/*
 * ATAN2(Y,X)
 * RETURN ARG (X+iY)
 * VAX D FORMAT (56 BITS PRECISION)
 * CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
 *
 *
 * Method :
 *	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
 *	2. Reduce x to positive by (if x and y are unexceptional):
 *		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
 *		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
 *	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
 *	   is further reduced to one of the following intervals and the
 *	   arctangent of y/x is evaluated by the corresponding formula:
 *
 *          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
 *	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
 *	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
 *	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
 *	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
 *
 * Special cases:
 * Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
 *
 *	ARG( NAN , (anything) ) is NaN;
 *	ARG( (anything), NaN ) is NaN;
 *	ARG(+(anything but NaN), +-0) is +-0  ;
 *	ARG(-(anything but NaN), +-0) is +-PI ;
 *	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
 *	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
 *	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
 *	ARG( +INF,+-INF ) is +-PI/4 ;
 *	ARG( -INF,+-INF ) is +-3PI/4;
 *	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
 *
 * Accuracy:
 *	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
 */

#ifdef WEAK_ALIAS
WEAK_ALIAS(atan2f, _atan2f)
#endif

ENTRY(_atan2f, 0)
	cvtfd	4(%ap),-(%sp)
	calls	$2,_C_LABEL(_atan2)
	cvtdf	%r0,%r0
	ret

#ifdef WEAK_ALIAS
WEAK_ALIAS(atan2, _atan2)
WEAK_ALIAS(_atan2l, _atan2)
#endif

ENTRY(_atan2, 0x0fc0)
	movq	4(%ap),%r2		# %r2 = y
	movq	12(%ap),%r4		# %r4 = x
	bicw3	$0x7f,%r2,%r0
	bicw3	$0x7f,%r4,%r1
	cmpw	%r0,$0x8000		# y is the reserved operand
	jeql	resop
	cmpw	%r1,$0x8000		# x is the reserved operand
	jeql	resop
	subl2	$8,%sp
	bicw3	$0x7fff,%r2,-4(%fp)	# copy y sign bit to -4(%fp)
	bicw3	$0x7fff,%r4,-8(%fp)	# copy x sign bit to -8(%fp)
	cmpd	%r4,$0x4080		# x = 1.0 ?
	bneq	xnot1
	movq	%r2,%r0
	bicw2	$0x8000,%r0		# t = |y|
	movq	%r0,%r2			# y = |y|
	jbr	begin
xnot1:
	bicw3	$0x807f,%r2,%r11		# yexp
	jeql	yeq0			# if y=0 goto yeq0
	bicw3	$0x807f,%r4,%r10		# xexp
	jeql	pio2			# if x=0 goto pio2
	subw2	%r10,%r11			# k = yexp - xexp
	cmpw	%r11,$0x2000		# k >= 64 (exp) ?
	jgeq	pio2			# atan2 = +-pi/2
	divd3	%r4,%r2,%r0		# t = y/x  never overflow
	bicw2	$0x8000,%r0		# t > 0
	bicw2	$0xff80,%r2		# clear the exponent of y
	bicw2	$0xff80,%r4		# clear the exponent of x
	bisw2	$0x4080,%r2		# normalize y to [1,2)
	bisw2	$0x4080,%r4		# normalize x to [1,2)
	subw2	%r11,%r4			# scale x so that yexp-xexp=k
begin:
	cmpw	%r0,$0x411c		# t : 39/16
	jgeq	L50
	addl3	$0x180,%r0,%r10		# 8*t
	cvtrfl	%r10,%r10			# [8*t] rounded to int
	ashl	$-1,%r10,%r10		# [8*t]/2
	casel	%r10,$0,$4
L1:
	.word	L20-L1
	.word	L20-L1
	.word	L30-L1
	.word	L40-L1
	.word	L40-L1
L10:
	movq	$0xb4d9940f985e407b,%r6	# Hi=.98279372324732906796d0
	movq	$0x21b1879a3bc2a2fc,%r8	# Lo=-.17092002525602665777d-17
	subd3	%r4,%r2,%r0		# y-x
	addw2	$0x80,%r0		# 2(y-x)
	subd2	%r4,%r0			# 2(y-x)-x
	addw2	$0x80,%r4		# 2x
	movq	%r2,%r10
	addw2	$0x80,%r10		# 2y
	addd2	%r10,%r2			# 3y
	addd2	%r4,%r2			# 3y+2x
	divd2	%r2,%r0			# (2y-3x)/(2x+3y)
	jbr	L60
L20:
	cmpw	%r0,$0x3280		# t : 2**(-28)
	jlss	L80
	clrq	%r6			# Hi=%r6=0, Lo=%r8=0
	clrq	%r8
	jbr	L60
L30:
	movq	$0xda7b2b0d63383fed,%r6	# Hi=.46364760900080611433d0
	movq	$0xf0ea17b2bf912295,%r8	# Lo=.10147340032515978826d-17
	movq	%r2,%r0
	addw2	$0x80,%r0		# 2y
	subd2	%r4,%r0			# 2y-x
	addw2	$0x80,%r4		# 2x
	addd2	%r2,%r4			# 2x+y
	divd2	%r4,%r0 			# (2y-x)/(2x+y)
	jbr	L60
L50:
	movq	$0x68c2a2210fda40c9,%r6	# Hi=1.5707963267948966135d1
	movq	$0x06e0145c26332326,%r8	# Lo=.22517417741562176079d-17
	cmpw	%r0,$0x5100		# y : 2**57
	bgeq	L90
	divd3	%r2,%r4,%r0
	bisw2	$0x8000,%r0 		# -x/y
	jbr	L60
L40:
	movq	$0x68c2a2210fda4049,%r6	# Hi=.78539816339744830676d0
	movq	$0x06e0145c263322a6,%r8	# Lo=.11258708870781088040d-17
	subd3	%r4,%r2,%r0		# y-x
	addd2	%r4,%r2			# y+x
	divd2	%r2,%r0			# (y-x)/(y+x)
L60:
	movq	%r0,%r10
	muld2	%r0,%r0
	polyd	%r0,$12,ptable
	muld2	%r10,%r0
	subd2	%r0,%r8
	addd3	%r8,%r10,%r0
	addd2	%r6,%r0
L80:
	movw	-8(%fp),%r2
	bneq	pim
	bisw2	-4(%fp),%r0		# return sign(y)*%r0
	ret
L90:					# x >= 2**25
	movq	%r6,%r0
	jbr	L80
pim:
	subd3	%r0,$0x68c2a2210fda4149,%r0	# pi-t
	bisw2	-4(%fp),%r0
	ret
yeq0:
	movw	-8(%fp),%r2
	beql	zero			# if sign(x)=1 return pi
	movq	$0x68c2a2210fda4149,%r0	# pi=3.1415926535897932270d1
	ret
zero:
	clrq	%r0			# return 0
	ret
pio2:
	movq	$0x68c2a2210fda40c9,%r0	# pi/2=1.5707963267948966135d1
	bisw2	-4(%fp),%r0		# return sign(y)*pi/2
	ret
resop:
	movq	$0x8000,%r0		# propagate the reserved operand
	ret

	_ALIGN_TEXT
ptable:
	.quad	0xb50f5ce96e7abd60
	.quad	0x51e44a42c1073e02
	.quad	0x3487e3289643be35
	.quad	0xdb62066dffba3e54
	.quad	0xcf8e2d5199abbe70
	.quad	0x26f39cb884883e88
	.quad	0x135117d18998be9d
	.quad	0x602ce9742e883eba
	.quad	0xa35ad0be8e38bee3
	.quad	0xffac922249243f12
	.quad	0x7f14ccccccccbf4c
	.quad	0xaa8faaaaaaaa3faa
	.quad	0x0000000000000000
